import scrapy
from ..items import TencentspiderItem

class TencentSpiderSpider(scrapy.Spider):
    name = 'tencent_spider'
    # allowed_domains = ['baidu.com']
    start_urls = ['https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1692602014404&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=40001&attrId=&keyword=&pageIndex=1&pageSize=10&language=zh-cn&area=cn']
    format_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp=1692603085073&postId={}&language=zh-cn'
    base_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1692604652781&countryId=&cityId=&bgIds=&productId=&categoryId=40001001,40001002,40001003,40001004,40001005,40001006&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'
    page = 1
    def parse(self, response):
        res = response.json()
        datas = res['Data']['Posts']
        for data in datas:
            postId = data['PostId']
            name = data['RecruitPostName']
            yield scrapy.Request(url=self.format_url.format(postId), callback=self.deep, cb_kwargs={'a': name})
        if self.page < 5:
            self.page += 1
            yield scrapy.Request(url=self.base_url.format(self.page), callback=self.parse)
            print(f'第{self.page}已经爬完')
# 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp=1692603085073&postId=1650020078734811136&language=zh-cn'
    def deep(self, response, **kwargs):
        res = response.json()
        item = TencentspiderItem()
        item['Responsibility'] = res['Data']['Responsibility']
        item['Requirement'] = res['Data']['Requirement']
        item['name'] = kwargs['a']
        yield item

'https://careers.tencent.com/m/search.html?pcid=40001'
'https://careers.tencent.com/m/search.html?pcid=40001'
'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1692604652781&countryId=&cityId=&bgIds=&productId=&categoryId=40001001,40001002,40001003,40001004,40001005,40001006&parentCategoryId=&attrId=&keyword=&pageIndex=2&pageSize=10&language=zh-cn&area=cn'